{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "75417ef5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pandas import Series, DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "2dbd4a89",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Plate ID</th>\n",
       "      <th>Registration State</th>\n",
       "      <th>Vehicle Body Type</th>\n",
       "      <th>Vehicle Make</th>\n",
       "      <th>Violation Time</th>\n",
       "      <th>Street Name</th>\n",
       "      <th>Violation Legal Code</th>\n",
       "      <th>Vehicle Color</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>J58JKX</td>\n",
       "      <td>NJ</td>\n",
       "      <td>SDN</td>\n",
       "      <td>HONDA</td>\n",
       "      <td>0523P</td>\n",
       "      <td>43 ST</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>KRE6058</td>\n",
       "      <td>PA</td>\n",
       "      <td>SUBN</td>\n",
       "      <td>ME/BE</td>\n",
       "      <td>0428P</td>\n",
       "      <td>UNION ST</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BLK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>444326R</td>\n",
       "      <td>NJ</td>\n",
       "      <td>SDN</td>\n",
       "      <td>LEXUS</td>\n",
       "      <td>0625A</td>\n",
       "      <td>CLERMONT AVENUE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BLACK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F728330</td>\n",
       "      <td>OH</td>\n",
       "      <td>SDN</td>\n",
       "      <td>CHEVR</td>\n",
       "      <td>1106A</td>\n",
       "      <td>DIVISION AVE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>FMY9090</td>\n",
       "      <td>NY</td>\n",
       "      <td>SUBN</td>\n",
       "      <td>JEEP</td>\n",
       "      <td>1253A</td>\n",
       "      <td>GRAND ST</td>\n",
       "      <td>NaN</td>\n",
       "      <td>GREY</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Plate ID Registration State Vehicle Body Type Vehicle Make Violation Time  \\\n",
       "0   J58JKX                 NJ               SDN        HONDA          0523P   \n",
       "1  KRE6058                 PA              SUBN        ME/BE          0428P   \n",
       "2  444326R                 NJ               SDN        LEXUS          0625A   \n",
       "3  F728330                 OH               SDN        CHEVR          1106A   \n",
       "4  FMY9090                 NY              SUBN         JEEP          1253A   \n",
       "\n",
       "       Street Name Violation Legal Code Vehicle Color  \n",
       "0            43 ST                  NaN            BK  \n",
       "1         UNION ST                  NaN           BLK  \n",
       "2  CLERMONT AVENUE                  NaN         BLACK  \n",
       "3     DIVISION AVE                  NaN           NaN  \n",
       "4         GRAND ST                  NaN          GREY  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Read the NYC parking violations data into memory.  Only\n",
    "# load the following columns:'Plate ID',  'Registration State', \n",
    "# 'Vehicle Make', 'Vehicle Color', 'Vehicle Make', \n",
    "# Violation Time', 'Street Name', 'Violation Legal Code', \n",
    "\n",
    "filename = '../data/nyc-parking-violations-2020.csv'\n",
    "\n",
    "df = pd.read_csv(filename, \n",
    "                 usecols=['Plate ID', \n",
    "                          'Registration State',\n",
    "                          'Vehicle Make',\n",
    "                          'Vehicle Color',\n",
    "                          'Vehicle Body Type', \n",
    "                          'Violation Time', \n",
    "                          'Street Name',\n",
    "                          'Violation Legal Code'])\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ce36295c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5235557375"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# How much memory, total, is being used by just these columns?\n",
    "orig_mem = df.memory_usage(deep=True).sum()\n",
    "orig_mem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2f0dee36",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Categorizing Plate ID...\n",
      "\tDone.\n",
      "Categorizing Registration State...\n",
      "\tDone.\n",
      "Categorizing Vehicle Body Type...\n",
      "\tDone.\n",
      "Categorizing Vehicle Make...\n",
      "\tDone.\n",
      "Categorizing Violation Time...\n",
      "\tDone.\n",
      "Categorizing Street Name...\n",
      "\tDone.\n",
      "Categorizing Violation Legal Code...\n",
      "\tDone.\n",
      "Categorizing Vehicle Color...\n",
      "\tDone.\n"
     ]
    }
   ],
   "source": [
    "# Replace each of these columns with a category.\n",
    "\n",
    "for one_colname in df.columns:\n",
    "    print(f'Categorizing {one_colname}...')\n",
    "    df[one_colname] = df[one_colname].astype('category')\n",
    "    print('\\tDone.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "2ac876bf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Plate ID                category\n",
       "Registration State      category\n",
       "Vehicle Body Type       category\n",
       "Vehicle Make            category\n",
       "Violation Time          category\n",
       "Street Name             category\n",
       "Violation Legal Code    category\n",
       "Vehicle Color           category\n",
       "dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# What types are your columns now?\n",
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "12b4cc8f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "547944786"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# How much memory, total, is being used after categories?\n",
    "new_mem = df.memory_usage(deep=True).sum()\n",
    "new_mem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0ac55768",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10465834805219759"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# What proportion of the previous memory are you now using?\n",
    "new_mem /  orig_mem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0708027-971d-4034-ba1f-0c761086e2be",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
